{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "52824b89-532a-4e54-87e9-1410813cd39e",
   "metadata": {},
   "source": [
    "# LangChain: Evaluation\n",
    "\n",
    "## Outline:\n",
    "\n",
    "* Example generation\n",
    "* Manual evaluation (and debuging)\n",
    "* LLM-assisted evaluation\n",
    "* LangChain evaluation platform"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "b7ed03ed-1322-49e3-b2a2-33e94fb592ef",
   "metadata": {
    "height": 81,
    "tags": []
   },
   "outputs": [],
   "source": [
    "import os\n",
    "\n",
    "from dotenv import load_dotenv, find_dotenv\n",
    "_ = load_dotenv(find_dotenv()) # read local .env file"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "945d8abb-ba55-40a4-a3c5-6ad0dab73e3e",
   "metadata": {},
   "source": [
    "Note: LLM's do not always produce the same results. When executing the code in your notebook, you may get slightly different answers that those in the video."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "24ff81cd-dce0-4344-8d45-4a98fd3a87c9",
   "metadata": {
    "height": 234
   },
   "outputs": [],
   "source": [
    "# account for deprecation of LLM model\n",
    "import datetime\n",
    "# Get the current date\n",
    "current_date = datetime.datetime.now().date()\n",
    "\n",
    "# Define the date after which the model should be set to \"gpt-3.5-turbo\"\n",
    "target_date = datetime.date(2024, 6, 12)\n",
    "\n",
    "# Set the model variable based on the current date\n",
    "if current_date > target_date:\n",
    "    llm_model = \"gpt-3.5-turbo\"\n",
    "else:\n",
    "    llm_model = \"gpt-3.5-turbo-0301\""
   ]
  },
  {
   "cell_type": "markdown",
   "id": "28008949",
   "metadata": {},
   "source": [
    "## Create our QandA application"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "974acf8e-8f88-42de-88f8-40a82cb58e8b",
   "metadata": {
    "height": 98
   },
   "outputs": [],
   "source": [
    "from langchain.chains import RetrievalQA\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.document_loaders import CSVLoader\n",
    "from langchain.indexes import VectorstoreIndexCreator\n",
    "from langchain.vectorstores import DocArrayInMemorySearch"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "9ec1106d",
   "metadata": {
    "height": 64
   },
   "outputs": [],
   "source": [
    "file = 'OutdoorClothingCatalog_1000.csv'\n",
    "loader = CSVLoader(file_path=file)\n",
    "data = loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "b31c218f",
   "metadata": {
    "height": 81
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "VectorStoreIndexWrapper(vectorstore=<langchain.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x7f6620311e20>)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "index = VectorstoreIndexCreator(\n",
    "    vectorstore_cls=DocArrayInMemorySearch\n",
    ").from_loaders([loader])\n",
    "index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "a2006054",
   "metadata": {
    "height": 200
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "RetrievalQA(memory=None, callbacks=None, callback_manager=None, verbose=True, combine_documents_chain=StuffDocumentsChain(memory=None, callbacks=None, callback_manager=None, verbose=False, input_key='input_documents', output_key='output_text', llm_chain=LLMChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=ChatPromptTemplate(input_variables=['context', 'question'], output_parser=None, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], output_parser=None, partial_variables={}, template=\"Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\n{context}\", template_format='f-string', validate_template=True), additional_kwargs={}), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], output_parser=None, partial_variables={}, template='{question}', template_format='f-string', validate_template=True), additional_kwargs={})]), llm=ChatOpenAI(verbose=False, callbacks=None, callback_manager=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.0, model_kwargs={}, openai_api_key=None, openai_api_base=None, openai_organization=None, request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None), output_key='text'), document_prompt=PromptTemplate(input_variables=['page_content'], output_parser=None, partial_variables={}, template='{page_content}', template_format='f-string', validate_template=True), document_variable_name='context', document_separator='<<<<>>>>>'), input_key='query', output_key='result', return_source_documents=False, retriever=VectorStoreRetriever(vectorstore=<langchain.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x7f6620311e20>, search_type='similarity', search_kwargs={}))"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "llm = ChatOpenAI(temperature = 0.0, model=llm_model)\n",
    "qa = RetrievalQA.from_chain_type(\n",
    "    llm=llm, \n",
    "    chain_type=\"stuff\", \n",
    "    retriever=index.vectorstore.as_retriever(), \n",
    "    verbose=True,\n",
    "    chain_type_kwargs = {\n",
    "        \"document_separator\": \"<<<<>>>>>\"\n",
    "    }\n",
    ")\n",
    "qa"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "791ebd73",
   "metadata": {},
   "source": [
    "### Coming up with test datapoints"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "fb04a0f9",
   "metadata": {
    "height": 30
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Document(page_content=\": 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.\", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 10})"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[10]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "fe4a88c2",
   "metadata": {
    "height": 30
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Document(page_content=': 11\\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exceptional range of motion. With a slightly fitted style that falls at the hip and best with a midweight layer, this jacket is suitable for light activity up to 20° and moderate activity up to -30°. The soft and durable 100% polyester shell offers complete windproof protection and is insulated with warm, lofty goose down. Other features include welded baffles for a no-stitch construction and excellent stretch, an adjustable hood, an interior media port and mesh stash pocket and a hem drawcord. Machine wash and dry. Imported.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 11})"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[11]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8d548aef",
   "metadata": {},
   "source": [
    "### Hard-coded examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c2d59bf2",
   "metadata": {
    "height": 217
   },
   "outputs": [],
   "source": [
    "examples = [\n",
    "    {\n",
    "        \"query\": \"Do the Cozy Comfort Pullover Set\\\n",
    "        have side pockets?\",\n",
    "        \"answer\": \"Yes\"\n",
    "    },\n",
    "    {\n",
    "        \"query\": \"What collection is the Ultra-Lofty \\\n",
    "        850 Stretch Down Hooded Jacket from?\",\n",
    "        \"answer\": \"The DownTek collection\"\n",
    "    }\n",
    "]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c7ce3e4f",
   "metadata": {},
   "source": [
    "### LLM-Generated examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "d44f8376",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "from langchain.evaluation.qa import QAGenerateChain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "34e87816",
   "metadata": {
    "height": 47
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "QAGenerateChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=PromptTemplate(input_variables=['doc'], output_parser=RegexParser(regex='QUESTION: (.*?)\\\\nANSWER: (.*)', output_keys=['query', 'answer'], default_output_key=None), partial_variables={}, template='You are a teacher coming up with questions to ask on a quiz. \\nGiven the following document, please generate a question and answer based on that document.\\n\\nExample Format:\\n<Begin Document>\\n...\\n<End Document>\\nQUESTION: question here\\nANSWER: answer here\\n\\nThese questions should be detailed and be based explicitly on information in the document. Begin!\\n\\n<Begin Document>\\n{doc}\\n<End Document>', template_format='f-string', validate_template=True), llm=ChatOpenAI(verbose=False, callbacks=None, callback_manager=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.7, model_kwargs={}, openai_api_key=None, openai_api_base=None, openai_organization=None, request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None), output_key='text')"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI(model=llm_model))\n",
    "example_gen_chain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "31fb5bd0-fc2a-478c-9a09-6fe7e3307241",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "# the warning below can be safely ignored"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "62abae09",
   "metadata": {
    "height": 81
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'query': \" According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\\n\\n\",\n",
       "  'answer': \" The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\"},\n",
       " {'query': ' What are the dimensions available for the Recycled Waterhog Dog Mat, Chevron Weave?\\n\\n',\n",
       "  'answer': ' The small size has dimensions of 18\" x 28\" and the medium size has dimensions of 22.5\" x 34.5\".'},\n",
       " {'query': \" What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?\\n\\n\",\n",
       "  'answer': \" The key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for secure fit and maximum coverage.\"},\n",
       " {'query': ' What is the fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top?\\n\\n',\n",
       "  'answer': ' The body of the tankini top is made of 82% recycled nylon and 18% Lycra® spandex, while the lining is composed of 90% recycled nylon and 10% Lycra® spandex.'},\n",
       " {'query': ' What technology is featured in the EcoFlex 3L Storm Pants that makes them more breathable?\\n\\n',\n",
       "  'answer': ' The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability ever tested.'}]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_examples = example_gen_chain.apply_and_parse(\n",
    "    [{\"doc\": t} for t in data[:5]]\n",
    ")\n",
    "new_examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "97ab28b5",
   "metadata": {
    "height": 30
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'query': \" According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\\n\\n\",\n",
       " 'answer': \" The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\"}"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "new_examples[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "0ebe4228",
   "metadata": {
    "height": 30
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Document(page_content=\": 0\\nname: Women's Campside Oxfords\\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \\n\\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \\n\\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \\n\\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \\n\\nQuestions? Please contact us for any inquiries.\", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "faf25f2f",
   "metadata": {},
   "source": [
    "### Combine examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "ada2a3fc",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "examples += new_examples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "9cdf5cf5",
   "metadata": {
    "height": 30
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Yes, the Cozy Comfort Pullover Set does have side pockets.'"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qa.run(examples[0][\"query\"])"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "63f3cb08",
   "metadata": {},
   "source": [
    "## Manual Evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "fcaf622e",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "import langchain\n",
    "langchain.debug = True"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "8a142638",
   "metadata": {
    "height": 30
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:RetrievalQA] Entering Chain run with input:\n",
      "\u001b[0m{\n",
      "  \"query\": \"Do the Cozy Comfort Pullover Set        have side pockets?\"\n",
      "}\n",
      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain] Entering Chain run with input:\n",
      "\u001b[0m[inputs]\n",
      "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] Entering Chain run with input:\n",
      "\u001b[0m{\n",
      "  \"question\": \"Do the Cozy Comfort Pullover Set        have side pockets?\",\n",
      "  \"context\": \": 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.<<<<>>>>>: 73\\nname: Cozy Cuddles Knit Pullover Set\\ndescription: Perfect for lounging, this knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out. \\n\\nSize & Fit \\nPants are Favorite Fit: Sits lower on the waist. \\nRelaxed Fit: Our most generous fit sits farthest from the body. \\n\\nFabric & Care \\nIn the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features \\nRelaxed fit top with raglan sleeves and rounded hem. \\nPull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg. \\nImported.<<<<>>>>>: 632\\nname: Cozy Comfort Fleece Pullover\\ndescription: The ultimate sweater fleece \\u2013 made from superior fabric and offered at an unbeatable price. \\n\\nSize & Fit\\nSlightly Fitted: Softly shapes the body. Falls at hip. \\n\\nWhy We Love It\\nOur customers (and employees) love the rugged construction and heritage-inspired styling of our popular Sweater Fleece Pullover and wear it for absolutely everything. From high-intensity activities to everyday tasks, you'll find yourself reaching for it every time.\\n\\nFabric & Care\\nRugged sweater-knit exterior and soft brushed interior for exceptional warmth and comfort. Made from soft, 100% polyester. Machine wash and dry.\\n\\nAdditional Features\\nFeatures our classic Mount Katahdin logo. Snap placket. Front princess seams create a feminine shape. Kangaroo handwarmer pockets. Cuffs and hem reinforced with jersey binding. Imported.\\n\\n \\u2013 Official Supplier to the U.S. Ski Team\\nTHEIR WILL TO WIN, WOVEN RIGHT IN. LEARN MORE<<<<>>>>>: 151\\nname: Cozy Quilted Sweatshirt\\ndescription: Our sweatshirt is an instant classic with its great quilted texture and versatile weight that easily transitions between seasons. With a traditional fit that is relaxed through the chest, sleeve, and waist, this pullover is lightweight enough to be worn most months of the year. The cotton blend fabric is super soft and comfortable, making it the perfect casual layer. To make dressing easy, this sweatshirt also features a snap placket and a heritage-inspired Mt. Katahdin logo patch. For care, machine wash and dry. Imported.\"\n",
      "}\n",
      "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain > 4:llm:ChatOpenAI] Entering LLM run with input:\n",
      "\u001b[0m{\n",
      "  \"prompts\": [\n",
      "    \"System: Use the following pieces of context to answer the users question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\n: 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.<<<<>>>>>: 73\\nname: Cozy Cuddles Knit Pullover Set\\ndescription: Perfect for lounging, this knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out. \\n\\nSize & Fit \\nPants are Favorite Fit: Sits lower on the waist. \\nRelaxed Fit: Our most generous fit sits farthest from the body. \\n\\nFabric & Care \\nIn the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features \\nRelaxed fit top with raglan sleeves and rounded hem. \\nPull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg. \\nImported.<<<<>>>>>: 632\\nname: Cozy Comfort Fleece Pullover\\ndescription: The ultimate sweater fleece \\u2013 made from superior fabric and offered at an unbeatable price. \\n\\nSize & Fit\\nSlightly Fitted: Softly shapes the body. Falls at hip. \\n\\nWhy We Love It\\nOur customers (and employees) love the rugged construction and heritage-inspired styling of our popular Sweater Fleece Pullover and wear it for absolutely everything. From high-intensity activities to everyday tasks, you'll find yourself reaching for it every time.\\n\\nFabric & Care\\nRugged sweater-knit exterior and soft brushed interior for exceptional warmth and comfort. Made from soft, 100% polyester. Machine wash and dry.\\n\\nAdditional Features\\nFeatures our classic Mount Katahdin logo. Snap placket. Front princess seams create a feminine shape. Kangaroo handwarmer pockets. Cuffs and hem reinforced with jersey binding. Imported.\\n\\n \\u2013 Official Supplier to the U.S. Ski Team\\nTHEIR WILL TO WIN, WOVEN RIGHT IN. LEARN MORE<<<<>>>>>: 151\\nname: Cozy Quilted Sweatshirt\\ndescription: Our sweatshirt is an instant classic with its great quilted texture and versatile weight that easily transitions between seasons. With a traditional fit that is relaxed through the chest, sleeve, and waist, this pullover is lightweight enough to be worn most months of the year. The cotton blend fabric is super soft and comfortable, making it the perfect casual layer. To make dressing easy, this sweatshirt also features a snap placket and a heritage-inspired Mt. Katahdin logo patch. For care, machine wash and dry. Imported.\\nHuman: Do the Cozy Comfort Pullover Set        have side pockets?\"\n",
      "  ]\n",
      "}\n",
      "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain > 4:llm:ChatOpenAI] [18.02ms] Exiting LLM run with output:\n",
      "\u001b[0m{\n",
      "  \"generations\": [\n",
      "    [\n",
      "      {\n",
      "        \"text\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\",\n",
      "        \"generation_info\": null,\n",
      "        \"message\": {\n",
      "          \"content\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\",\n",
      "          \"additional_kwargs\": {},\n",
      "          \"example\": false\n",
      "        }\n",
      "      }\n",
      "    ]\n",
      "  ],\n",
      "  \"llm_output\": {\n",
      "    \"token_usage\": {\n",
      "      \"prompt_tokens\": 732,\n",
      "      \"completion_tokens\": 14,\n",
      "      \"total_tokens\": 746\n",
      "    },\n",
      "    \"model_name\": \"gpt-3.5-turbo\"\n",
      "  }\n",
      "}\n",
      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain > 3:chain:LLMChain] [18.523ms] Exiting Chain run with output:\n",
      "\u001b[0m{\n",
      "  \"text\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\"\n",
      "}\n",
      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:RetrievalQA > 2:chain:StuffDocumentsChain] [18.948ms] Exiting Chain run with output:\n",
      "\u001b[0m{\n",
      "  \"output_text\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\"\n",
      "}\n",
      "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[1:chain:RetrievalQA] [45.053000000000004ms] Exiting Chain run with output:\n",
      "\u001b[0m{\n",
      "  \"result\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\"\n",
      "}\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'Yes, the Cozy Comfort Pullover Set does have side pockets.'"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "qa.run(examples[0][\"query\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "b3d6bef0",
   "metadata": {
    "height": 47
   },
   "outputs": [],
   "source": [
    "# Turn off the debug mode\n",
    "langchain.debug = False"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5bdbdce",
   "metadata": {},
   "source": [
    "## LLM assisted evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "a4dca05a",
   "metadata": {
    "height": 47
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n",
      "\n",
      "\n",
      "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n",
      "\n",
      "\u001b[1m> Finished chain.\u001b[0m\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(list,\n",
       " [{'query': 'Do the Cozy Comfort Pullover Set        have side pockets?',\n",
       "   'answer': 'Yes',\n",
       "   'result': 'Yes, the Cozy Comfort Pullover Set does have side pockets.'},\n",
       "  {'query': 'What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?',\n",
       "   'answer': 'The DownTek collection',\n",
       "   'result': 'The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.'},\n",
       "  {'query': \" According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\\n\\n\",\n",
       "   'answer': \" The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\",\n",
       "   'result': \"The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\"},\n",
       "  {'query': ' What are the dimensions available for the Recycled Waterhog Dog Mat, Chevron Weave?\\n\\n',\n",
       "   'answer': ' The small size has dimensions of 18\" x 28\" and the medium size has dimensions of 22.5\" x 34.5\".',\n",
       "   'result': 'The dimensions available for the Recycled Waterhog Dog Mat, Chevron Weave are:\\n- Small: 18\" x 28\"\\n- Medium: 22.5\" x 34.5\"'},\n",
       "  {'query': \" What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?\\n\\n\",\n",
       "   'answer': \" The key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for secure fit and maximum coverage.\",\n",
       "   'result': \"Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece are:\\n- Bright colors, ruffles, and exclusive whimsical prints\\n- Four-way-stretch and chlorine-resistant fabric\\n- UPF 50+ rated fabric for high sun protection\\n- Crossover no-slip straps for a secure fit\\n- Fully lined bottom for maximum coverage\\n- Machine washable and line dry for best results\"},\n",
       "  {'query': ' What is the fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top?\\n\\n',\n",
       "   'answer': ' The body of the tankini top is made of 82% recycled nylon and 18% Lycra® spandex, while the lining is composed of 90% recycled nylon and 10% Lycra® spandex.',\n",
       "   'result': 'The fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top is 82% recycled nylon with 18% Lycra® spandex for the body, and 90% recycled nylon with 10% Lycra® spandex for the lining.'},\n",
       "  {'query': ' What technology is featured in the EcoFlex 3L Storm Pants that makes them more breathable?\\n\\n',\n",
       "   'answer': ' The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability ever tested.',\n",
       "   'result': 'The EcoFlex 3L Storm Pants feature TEK O2 technology, which is designed to offer the most breathability tested by the brand.'}])"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "predictions = qa.apply(examples)\n",
    "type(predictions), predictions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "6012a3e0",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": [
    "from langchain.evaluation.qa import QAEvalChain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "724b1c0b",
   "metadata": {
    "height": 64
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "QAEvalChain(memory=None, callbacks=None, callback_manager=None, verbose=False, prompt=PromptTemplate(input_variables=['query', 'result', 'answer'], output_parser=None, partial_variables={}, template=\"You are a teacher grading a quiz.\\nYou are given a question, the student's answer, and the true answer, and are asked to score the student answer as either CORRECT or INCORRECT.\\n\\nExample Format:\\nQUESTION: question here\\nSTUDENT ANSWER: student's answer here\\nTRUE ANSWER: true answer here\\nGRADE: CORRECT or INCORRECT here\\n\\nGrade the student answers based ONLY on their factual accuracy. Ignore differences in punctuation and phrasing between the student answer and true answer. It is OK if the student answer contains more information than the true answer, as long as it does not contain any conflicting statements. Begin! \\n\\nQUESTION: {query}\\nSTUDENT ANSWER: {result}\\nTRUE ANSWER: {answer}\\nGRADE:\", template_format='f-string', validate_template=True), llm=ChatOpenAI(verbose=False, callbacks=None, callback_manager=None, client=<class 'openai.api_resources.chat_completion.ChatCompletion'>, model_name='gpt-3.5-turbo', temperature=0.0, model_kwargs={}, openai_api_key=None, openai_api_base=None, openai_organization=None, request_timeout=None, max_retries=6, streaming=False, n=1, max_tokens=None), output_key='text')"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "llm = ChatOpenAI(temperature=0, model=llm_model)\n",
    "eval_chain = QAEvalChain.from_llm(llm)\n",
    "eval_chain"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "8b46ae55",
   "metadata": {
    "height": 47
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{'text': 'CORRECT'},\n",
       " {'text': 'CORRECT'},\n",
       " {'text': 'CORRECT'},\n",
       " {'text': 'CORRECT'},\n",
       " {'text': 'CORRECT'},\n",
       " {'text': 'CORRECT'},\n",
       " {'text': 'CORRECT'}]"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "graded_outputs = eval_chain.evaluate(examples, predictions)\n",
    "graded_outputs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "3437cfbe",
   "metadata": {
    "height": 132
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Example 0:\n",
      "Question: Do the Cozy Comfort Pullover Set        have side pockets?\n",
      "Real Answer: Yes\n",
      "Predicted Answer: Yes, the Cozy Comfort Pullover Set does have side pockets.\n",
      "Predicted Grade: CORRECT\n",
      "\n",
      "Example 1:\n",
      "Question: What collection is the Ultra-Lofty         850 Stretch Down Hooded Jacket from?\n",
      "Real Answer: The DownTek collection\n",
      "Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.\n",
      "Predicted Grade: CORRECT\n",
      "\n",
      "Example 2:\n",
      "Question:  According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\n",
      "\n",
      "\n",
      "Real Answer:  The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\n",
      "Predicted Answer: The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\n",
      "Predicted Grade: CORRECT\n",
      "\n",
      "Example 3:\n",
      "Question:  What are the dimensions available for the Recycled Waterhog Dog Mat, Chevron Weave?\n",
      "\n",
      "\n",
      "Real Answer:  The small size has dimensions of 18\" x 28\" and the medium size has dimensions of 22.5\" x 34.5\".\n",
      "Predicted Answer: The dimensions available for the Recycled Waterhog Dog Mat, Chevron Weave are:\n",
      "- Small: 18\" x 28\"\n",
      "- Medium: 22.5\" x 34.5\"\n",
      "Predicted Grade: CORRECT\n",
      "\n",
      "Example 4:\n",
      "Question:  What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?\n",
      "\n",
      "\n",
      "Real Answer:  The key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for secure fit and maximum coverage.\n",
      "Predicted Answer: Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece are:\n",
      "- Bright colors, ruffles, and exclusive whimsical prints\n",
      "- Four-way-stretch and chlorine-resistant fabric\n",
      "- UPF 50+ rated fabric for high sun protection\n",
      "- Crossover no-slip straps for a secure fit\n",
      "- Fully lined bottom for maximum coverage\n",
      "- Machine washable and line dry for best results\n",
      "Predicted Grade: CORRECT\n",
      "\n",
      "Example 5:\n",
      "Question:  What is the fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top?\n",
      "\n",
      "\n",
      "Real Answer:  The body of the tankini top is made of 82% recycled nylon and 18% Lycra® spandex, while the lining is composed of 90% recycled nylon and 10% Lycra® spandex.\n",
      "Predicted Answer: The fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top is 82% recycled nylon with 18% Lycra® spandex for the body, and 90% recycled nylon with 10% Lycra® spandex for the lining.\n",
      "Predicted Grade: CORRECT\n",
      "\n",
      "Example 6:\n",
      "Question:  What technology is featured in the EcoFlex 3L Storm Pants that makes them more breathable?\n",
      "\n",
      "\n",
      "Real Answer:  The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability ever tested.\n",
      "Predicted Answer: The EcoFlex 3L Storm Pants feature TEK O2 technology, which is designed to offer the most breathability tested by the brand.\n",
      "Predicted Grade: CORRECT\n",
      "\n"
     ]
    }
   ],
   "source": [
    "for i, eg in enumerate(examples):\n",
    "    print(f\"Example {i}:\")\n",
    "    print(\"Question: \" + predictions[i]['query'])\n",
    "    print(\"Real Answer: \" + predictions[i]['answer'])\n",
    "    print(\"Predicted Answer: \" + predictions[i]['result'])\n",
    "    print(\"Predicted Grade: \" + graded_outputs[i]['text'])\n",
    "    print()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fad0ddd1",
   "metadata": {},
   "source": [
    "## LangChain evaluation platform"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ef63bb24",
   "metadata": {},
   "source": [
    "The LangChain evaluation platform, LangChain Plus, can be accessed here https://www.langchain.plus/.  \n",
    "Use the invite code `lang_learners_2023`"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "35b95a2e-3bc7-429a-83af-387239e7f2a1",
   "metadata": {},
   "source": [
    "Reminder: Download your notebook to you local computer to save your work."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be5b2aae",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "319798ba",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "89a504ad",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "dedd758b",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "36885b20",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "65c6cfb6",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9ad3c7cc",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "26ac493e",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f94cdacd",
   "metadata": {
    "height": 30
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.19"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
